{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from shutil import copytree\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "FROM_PATH=Path('data/audio_google')\n",
    "TO_PATH=Path('data/audio_google4alignment')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Copy audio folders for alignment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "WindowsPath('data/audio_google4alignment')"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "copytree(FROM_PATH, TO_PATH)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## create .lab files with text in audio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_label2wav_files(audio_folder):\n",
    "    audio_folder=Path(audio_folder)\n",
    "    speech_label=os.path.split(audio_folder)[-1]\n",
    "    wav_files=list(list(audio_folder.glob('*.wav')))\n",
    "    #add label because for alignment names cannot repeat across folders\n",
    "    wav_files=[file.replace(str(file).replace('.wav', f'{speech_label}.wav')) for file in wav_files]  \n",
    "    \n",
    "def create_folder_lab_files(audio_folder):\n",
    "    audio_folder=Path(audio_folder)\n",
    "    speech_label=os.path.split(audio_folder)[-1]\n",
    "    wav_files=list(list(audio_folder.glob('*.wav')))           \n",
    "    lab_files=[str(file).replace('.wav', '.lab') for file in wav_files]\n",
    "    \n",
    "    for lab_file in lab_files:\n",
    "        Path(lab_file).write_text(speech_label.upper())        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_subdir_lab_files(audio_folder, rename_wavs=False):\n",
    "    if rename_wavs:\n",
    "        add_label2wav_files(audio_folder)\n",
    "    for command in os.listdir(audio_folder):\n",
    "        print(f'working on folder {command}')\n",
    "        create_folder_lab_files(TO_PATH/command)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for command in os.listdir(TO_PATH)[:1]:\n",
    "#     print(f'working on folder {command}')\n",
    "#     create_folder_lab_files(TO_PATH/command)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "working on folder down\n",
      "working on folder go\n",
      "working on folder left\n",
      "working on folder no\n",
      "working on folder right\n",
      "working on folder stop\n",
      "working on folder up\n",
      "working on folder yes\n"
     ]
    }
   ],
   "source": [
    "create_subdir_lab_files(TO_PATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
